Load R packages and define colour functions

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(colorspace) ; library(corrplot)
library(GGally) ; library(ggpubr) ; library(ggExtra)
library(WGCNA)
library(expss)
library(polycor)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra) ; library(xtable)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

# Get colors from the ggplot palette
gg_colour_hue = function(n) {
  hues = seq(15, 375, length = n+1)
  pal = hcl(h = hues, l = 65, c = 100)[1:n]
}

# Assign an HCL rainbow colour to each module
get_mod_colours = function(mods){
  
  n = length(unique(mods))-1
  set.seed(123) ; rand_order = sample(1:n)
  mod_colors = c('white', gg_colour_hue(n)[rand_order])
  names(mod_colors) = mods %>% table %>% names
  
  return(mod_colors)
}

# Compare results from GSEA and ORA
compare_methods = function(GSEA_list, ORA_list, top_modules_enrichment, top_modules, database){

  for(module in top_modules){
    cat(paste0('  \n  \n Enrichment results for cluster ', 
                 genes_info$module_number[genes_info$Module==module][1], ':  \n'))
    
    cat(paste0('- GSEA has ', nrow(GSEA_list[[module]][[database]]@result), ' enriched term(s)  \n'))
    cat(paste0('- ORA has  ', nrow(ORA_list[[module]][[database]]@result), ' enriched term(s)  \n'))
    cat(paste0('- ', nrow(top_modules_enrichment[[module]][[database]]), 
               ' terms are enriched in both methods  \n  \n'))

    enriched_terms = top_modules_enrichment[[module]][[database]] %>%
                     dplyr::select(ID, Description.x, p.adjust_ORA, p.adjust_GSEA, qvalue_ORA, GeneRatio) %>%
                     dplyr::rename('Description' = Description.x)
    
    if(nrow(enriched_terms)>0){
      print(enriched_terms %>% mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>% 
                          arrange(pval_mean) %>% dplyr::select(-pval_mean) %>% 
            kable %>% kable_styling(full_width = F))
      
      ##########################################################################################################
      # Get genes involved
      genes = c()
      i=1
      for(row_genes in top_modules_enrichment[[module]][[database]] %>% pull(geneID)){
        genes = c(genes, strsplit(row_genes,'/') %>% unlist) %>% unique
        if(i==5){
          cat(paste0('Genes involved in top 5 enriched terms: ',
                     paste(gene_names %>% filter(entrezgene %in% genes) %>% pull(hgnc_symbol) %>% unique %>% 
                           sort, collapse = ', '),'\n'))
        }
        i = i+1
      }
      
      if(i != 5){
        genes = gene_names %>% filter(entrezgene %in% genes) %>% pull(hgnc_symbol) %>% unique %>% sort
        cat(paste0('\nGenes involved in all enriched terms: ', paste(genes, collapse = ', ')))  
      }
      ##########################################################################################################
    }
    
  }
}

plot_results = function(top_modules_enrichment, top_modules, database){
  
  l = htmltools::tagList()

  for(i in 1:length(top_modules)){
    
    plot_data = top_modules_enrichment[[top_modules[i]]][[database]] %>%
                dplyr::rename('Description' = Description.x)
    
    if(nrow(plot_data)>5){
      min_val = min(min(plot_data$p.adjust_GSEA), min(plot_data$p.adjust_ORA))
      max_val = max(max(max(plot_data$p.adjust_GSEA), max(plot_data$p.adjust_ORA)), 0.05)
      ggp = ggplotly(plot_data %>% ggplot(aes(p.adjust_GSEA, p.adjust_ORA, color = NES)) + 
                     geom_point(aes(id = Description)) + 
                     geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     ggtitle(paste0('Enriched terms in common for cluster ', 
                                    genes_info$module_number[genes_info$Module==top_modules[i]][1])) +
                     scale_x_continuous(limits = c(min_val, max_val)) + 
                     scale_y_continuous(limits = c(min_val, max_val)) + 
                     xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
                     scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
      l[[i]] = ggp
    }
  }
  
  return(l)
}


# plot_shared_genes(top_modules_enrichment, top_modules, 'GO')
plot_shared_genes = function(top_modules_enrichment, top_modules, database){

  for(tm in 1:length(top_modules)){
    
    plot_data = top_modules_enrichment[[top_modules[tm]]][[database]] %>% 
                mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>% arrange(pval_mean) %>% 
                dplyr::select(ID, geneID)
    
    if(nrow(plot_data)>=2){

      plot_data = plot_data %>% slice_head(n=5)
    
      shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
      for(i in 1:(nrow(plot_data)-1)){
        for(j in (i+1):nrow(plot_data)){
          gene_set_1 = strsplit(plot_data$geneID[i], '/') %>% unlist
          gene_set_2 = strsplit(plot_data$geneID[j], '/') %>% unlist
          shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
          shared_genes[j,i] = shared_genes[i,j]
        }
      }
      rownames(shared_genes) = plot_data$ID
      colnames(shared_genes) = plot_data$ID
  
      corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n', 
               tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
               mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
               title = paste0('Genes in common for top terms in cluster ',
                              genes_info$module_number[genes_info$Module==top_modules[tm]][1]))
    }
  }
}

# Print table with top results (for annex in thesis)
print_table_w_top_results = function(top_modules_enrichment, module, database, n){
  
  enriched_terms = top_modules_enrichment[[module]][[database]] %>%
                   mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>% arrange(pval_mean) %>%
                   top_n(-n, wt=pval_mean) %>% dplyr::rename('Description' = Description.x) %>%
                   dplyr::select(ID, Description, p.adjust_GSEA, p.adjust_ORA, NES, GeneRatio) %>%
                   xtable(display =c('s','s','s','e','e','f','s'))

  return(print(enriched_terms, include.rownames=FALSE))
}

#print_table_w_top_results(selected_modules_enrichment, names(selected_modules_enrichment)[2], 'DN', 5)

Load preprocessed dataset (code in 2.1.Preprocessing_pipeline.Rmd)

# SFARI Genes
SFARI_genes = read_csv('./../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')

# Load Gandal dataset
load('./../Data/preprocessedData/preprocessed_data.RData')
datExpr = datExpr %>% data.frame

# WGCNA metrics
WGCNA_metrics = read.csv('./../Data/preprocessedData/WGCNA_metrics.csv')

# Updates genes_info with SFARI information and clusters
genes_info = genes_info %>% left_join(SFARI_genes, by = 'ID') %>% 
             left_join(datGenes %>% mutate(ID = rownames(.)) %>% dplyr::select(ID, hgnc_symbol), by = 'ID') %>%
             dplyr::select(ID, hgnc_symbol, log2FoldChange, shrunken_log2FoldChange, significant, Neuronal) %>%
             left_join(WGCNA_metrics, by = 'ID') %>% dplyr::select(-contains('pval'))


################################################################################################################
# Get entrezene ID of genes
gene_names = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
  
# ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), 
                       values=gene_names$ensembl_gene_id, mart=mart)

gene_names = biomart_output %>% left_join(gene_names %>% dplyr::select(ensembl_gene_id, hgnc_symbol), 
                                          by='ensembl_gene_id') %>% dplyr::rename('ID'=ensembl_gene_id)

rm(getinfo, mart, biomart_output)



rm(dds, WGCNA_metrics)




Methodology

Both GSEA and ORA are commonly used to study enrichment in sets of genes, but when using them for studying our modules both have shortcomings:

module = genes_info %>% filter(abs(MTcor) > 0.9) %>% slice_head(n=1) %>% pull(Module) %>% as.character

plot_data = genes_info %>% dplyr::select(Module, paste0('MM.',gsub('#','',module))) %>% 
            mutate(in_module = substring(Module,2) == gsub('#','',module), 
                   selected_module = paste('Cluster', genes_info$module_number[genes_info$Module==module][1] %>% 
                                             as.character)) %>%
            mutate(alpha = ifelse(in_module, 0.8, 0.1))
colnames(plot_data)[2] = 'MM'

p = plot_data %>% ggplot(aes(selected_module, MM, color = in_module)) + geom_jitter(alpha = plot_data$alpha) + 
    xlab('') + ylab(paste('Cluster membership to cluster', 
                          genes_info$module_number[genes_info$Module==module][1])) + coord_flip() + 
    theme_minimal() + theme(legend.position = 'bottom', axis.text.y = element_blank(),
                            axis.ticks.y = element_blank()) + 
    labs(color = paste('Gene belongs to cluster', genes_info$module_number[genes_info$Module==module][1]))

ggExtra::ggMarginal(p, type = 'density', groupColour = TRUE, groupFill = TRUE, margins = 'x', size=1)

rm(modules, module, p, plot_data)

So perhaps it could be useful to use both methods together, since they seem to complement each other’s shortcomings very well, performing the enrichment using both methods and identifying the terms that are found to be enriched by both

Note: Since the enrichment in both methods is quite a stric restriction, we decide to relax the corrected p-value threshold (using Bonferroni correction) to 0.1.


Perform Enrichment Analysis

Note: This script may take a bit to run (~30 mins with an 8 core Intel(R) Core(TM) i5-8400H CPU @ 2.50GHz laptop) and sometimes there are problems with the API and it will freeze or kill the process printing ‘error writing to connection’, but this when this has happened, it has been fixed in less than a day (except once that took 4 days…).

top_modules = c('#44A0FF', '#D177FF', '#F47B5B', '#00BADE', '#64B200', '#DD71FA')

if(file.exists('./../Data/preprocessedData/top_modules_enrichment.RData')){
  load('./../Data/preprocessedData/top_modules_enrichment.RData')
  load('./../Data/preprocessedData/GSEA_results.RData')
  load('./../Data/preprocessedData/ORA_results.RData')
} else{
    
  ################################################################################################################
  # Prepare dataset for Enrichment Analysis
  
  EA_dataset = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
  
  # ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
  getinfo = c('ensembl_gene_id','entrezgene')
  mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
  biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), 
                         values=EA_dataset$ensembl_gene_id, mart=mart)
  
  EA_dataset = biomart_output %>% left_join(EA_dataset, by='ensembl_gene_id') %>% dplyr::rename('ID'=ensembl_gene_id)
  
  rm(getinfo, mart, biomart_output)
  
  ################################################################################################################
  # GSEA enrichment
  
  file_name = './../Data/preprocessedData/GSEA_results.RData'
  if(file.exists(file_name)){
    load(file_name)
  } else {
    nPerm = 1e5
    GSEA_dataset = EA_dataset %>% dplyr::select(ID, entrezgene, contains('MM.'))
    GSEA_enrichment = list()
    
    for(module in top_modules){
      
      cat(paste0('\nModule: ', which(top_modules == module), '/', length(top_modules)))
      
      geneList = GSEA_dataset %>% pull(paste0('MM.',substring(module,2)))
      names(geneList) = GSEA_dataset %>% pull(entrezgene) %>% as.character
      geneList = sort(geneList, decreasing = TRUE)
      
      GSEA_GO = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                      nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_DO = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                      nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_DGN = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                        nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_KEGG = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                          nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_Reactome = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                                 nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_enrichment[[module]] = list('GO' = GSEA_GO, 'DO' = GSEA_DO, 'DGN' = GSEA_DGN, 'KEGG' = GSEA_KEGG, 
                                       'Reactome' = GSEA_Reactome)
      
      # Save after each iteration (in case it breaks)
      save(GSEA_enrichment, file = file_name)
    }
    
    rm(GSEA_dataset, nPerm, geneList, GSEA_GO, GSEA_DO, GSEA_DGN, GSEA_KEGG, GSEA_Reactome)
    
  }
  
  ################################################################################################################
  # ORA enrichment
  
  file_name = './../Data/preprocessedData/ORA_results.RData'
  if(file.exists(file_name)){
    load(file_name)
  } else {
    # Prepare input
    universe = EA_dataset$entrezgene %>% as.character
    
    # Perform Enrichment
    ORA_enrichment = list()
    
    for(module in top_modules){
      
      genes_in_module = EA_dataset %>% filter(Module == module) %>% pull(entrezgene)
      
      ORA_GO = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db, ont = 'All', 
                        pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, qvalueCutoff = 1)
      
      ORA_DO = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                        pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
      
      ORA_DGN = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                          pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
      
      ORA_KEGG = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                            pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) 
      
      ORA_Reactome = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                   pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
      
      ORA_enrichment[[module]] = list('GO' = ORA_GO, 'DO' = ORA_DO, 'DGN' = ORA_DGN, 'KEGG' = ORA_KEGG, 
                                      'Reactome' = ORA_Reactome)
      
      # Save after each iteration
      save(ORA_enrichment, file = file_name)
    }
    
    rm(universe, genes_in_module, module, ORA_GO, ORA_DGN, ORA_DO, ORA_KEGG, ORA_Reactome)
  
  }
  
  ################################################################################################################
  # Get shared enrichment for each module
  
  top_modules_enrichment = list()
  
  for(module in top_modules){
    
    module_enrichment = list()
    GSEA_enrichment_for_module = GSEA_enrichment[[module]]
    ORA_enrichment_for_module = ORA_enrichment[[module]]
    
    for(dataset in c('KEGG', 'Reactome', 'GO', 'DO', 'DGN')){
      
      GSEA_enrichment_dataset = GSEA_enrichment_for_module[[dataset]] %>% data.frame %>%
        dplyr::rename('pvalue_GSEA' = pvalue, 'p.adjust_GSEA' = p.adjust, 'qvalues_GSEA' = qvalues)
      
      ORA_enrichment_dataset = ORA_enrichment_for_module[[dataset]] %>% data.frame %>%
        dplyr::rename('pvalue_ORA' = pvalue, 'p.adjust_ORA' = p.adjust, 'qvalue_ORA' = qvalue)
      
      # Get shared enrichments (if any)
      shared_enrichment_dataset = GSEA_enrichment_dataset %>% inner_join(ORA_enrichment_dataset, by = 'ID')
      
      module_enrichment[[dataset]] = shared_enrichment_dataset
    }
    
    top_modules_enrichment[[module]] = module_enrichment  
  }
  
  save(top_modules_enrichment, file = './../Data/preprocessedData/top_modules_enrichment.RData')
  
  rm(module, module_enrichment, GSEA_enrichment_for_module, ORA_enrichment_for_module, dataset, 
     GSEA_enrichment_dataset, ORA_enrichment_dataset, shared_enrichment_dataset)
}


3.4.1 Top clusters by cluster-diagnosis correlation


top_modules_mtcor = top_modules[1:3]


Gene Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'GO')

Enrichment results for cluster 20:
- GSEA has 36 enriched term(s)
- ORA has 1 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 36:
- GSEA has 316 enriched term(s)
- ORA has 39 enriched term(s)
- 18 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
GO:0048514 blood vessel morphogenesis 0.0000006 0.0805837 0.0000004 50/517
GO:0042060 wound healing 0.0000363 0.0813934 0.0000048 44/517
GO:0001525 angiogenesis 0.0000023 0.0822846 0.0000007 44/517
GO:0006333 chromatin assembly or disassembly 0.0016454 0.0922041 0.0001126 21/517
GO:0031497 chromatin assembly 0.0004459 0.0935875 0.0000397 20/517
GO:0034728 nucleosome organization 0.0014996 0.0927348 0.0001112 20/517
GO:0006334 nucleosome assembly 0.0000482 0.0946569 0.0000048 20/517
GO:0030219 megakaryocyte differentiation 0.0008059 0.0966219 0.0000652 16/517
GO:0009617 response to bacterium 0.0230690 0.0824462 0.0011402 35/517
GO:0006323 DNA packaging 0.0162955 0.0914236 0.0009061 20/517
GO:0050865 regulation of cell activation 0.0275775 0.0816087 0.0012913 37/517
GO:0045652 regulation of megakaryocyte differentiation 0.0130287 0.0979723 0.0007727 13/517
GO:1903706 regulation of hemopoiesis 0.0427370 0.0826343 0.0019010 34/517
GO:0030099 myeloid cell differentiation 0.0465264 0.0839342 0.0019710 31/517
GO:0060964 regulation of gene silencing by miRNA 0.0594324 0.0949961 0.0022988 15/517
GO:1904035 regulation of epithelial cell apoptotic process 0.0577392 0.0992782 0.0022988 11/517
GO:0060147 regulation of posttranscriptional gene silencing 0.0740821 0.0949420 0.0025348 15/517
GO:0060966 regulation of gene silencing by RNA 0.0740821 0.0949420 0.0025348 15/517

Genes involved in top 5 enriched terms: ABL1, ACP5, ADAM8, ANPEP, ATP11C, B4GALT1, BANK1, BIN3, BTG1, C5AR1, CCL2, CD274, CD55, CDK19, CDKN1A, CELSR1, CFLAR, CLCF1, COL4A1, COL4A2, COL8A2, CPQ, CTGF, CUBN, CXCL2, CYR61, DOCK6, EIF2AK3, EMILIN1, ENPP3, EPPK1, ESM1, EYA1, FOS, GCLC, GRAP2, GSN, HBEGF, HIF3A, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IFRD1, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, LAMA5, LEMD3, LOXL3, LRP5, LYST, MAFF, MAFK, MAPK7, MED1, MMP14, MMP2, MT2A, MTDH, MYH9, MYLK, MYOF, MYOZ1, MZB1, NOD2, NOTCH3, NR4A1, PDE4B, PELI1, PHLDB2, PLAU, PLAUR, PNP, PRRX1, PTGER4, PTGS2, PTPN22, RAPGEF2, RBM15, RCOR1, RELA, RIPK2, RIPK3, RREB1, RUNX3, SAT1, SEMA5A, SERPINE1, SGMS1, SH2B3, SH2D2A, SHB, SLC7A11, SPHK1, TAL1, TFPI2, TGFBI, THBS1, TIGIT, TIMP1, TLN1, TNFRSF12A, TNFSF8, TRAF6, UNC13B, VASH1, ZC3H12A

Genes involved in all enriched terms: ABL1, ACP5, ADAM8, AGO4, ANPEP, ATP11C, B4GALT1, BANK1, BIN3, BTG1, C5AR1, CCL2, CD248, CD274, CD55, CDK19, CDKN1A, CELSR1, CFLAR, CLCF1, COL4A1, COL4A2, COL8A2, CPQ, CTGF, CUBN, CXCL2, CYR61, DDX5, DOCK6, DYRK3, EIF2AK3, EMILIN1, ENPP3, EPPK1, ESM1, EYA1, FOS, FSTL3, GCLC, GRAP2, GSN, HAT1, HBEGF, HIF3A, HIST1H1D, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HP1BP3, HSPB1, IFRD1, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, KAT6A, LAMA5, LEMD3, LIF, LOXL3, LRP5, LTBR, LYST, MAFF, MAFK, MAPK7, MED1, MMP14, MMP2, MT2A, MTDH, MYH9, MYLK, MYOF, MYOZ1, MZB1, NOD2, NOTCH3, NR4A1, NRROS, NUP188, NUP98, NUPR1, OSM, PDE4B, PELI1, PHLDB2, PLAU, PLAUR, PNP, PRRX1, PSMC1, PTGER4, PTGS2, PTPN22, RANBP2, RAPGEF2, RBM15, RCOR1, RELA, RIPK2, RIPK3, RREB1, RUNX3, SAT1, SEMA5A, SERPINE1, SGMS1, SH2B3, SH2D2A, SHB, SLC7A11, SOX6, SPHK1, SPTY2D1, TAL1, TCF12, TCIRG1, TET2, TFPI2, TGFBI, THBS1, TIGIT, TIMP1, TLN1, TMOD3, TNFRSF12A, TNFSF8, TRAF6, UNC13B, VASH1, ZC3H12A

Enrichment results for cluster 45:
- GSEA has 0 enriched term(s)
- ORA has 73 enriched term(s)
- 0 terms are enriched in both methods


Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_mtcor, 'GO')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'GO')


Disease Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'DO')

Enrichment results for cluster 20:
- GSEA has 77 enriched term(s)
- ORA has 495 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 36:
- GSEA has 206 enriched term(s)
- ORA has 670 enriched term(s)
- 15 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
DOID:7148 rheumatoid arthritis 0.0000009 0.0110111 0.0000006 48/284
DOID:2394 ovarian cancer 0.0008573 0.0116314 0.0001688 31/284
DOID:289 endometriosis 0.0000344 0.0131491 0.0000116 16/284
DOID:2151 malignant ovarian surface epithelial-stromal neoplasm 0.0014993 0.0117612 0.0001688 29/284
DOID:2152 ovary epithelial cancer 0.0014993 0.0117612 0.0001688 29/284
DOID:4001 ovarian carcinoma 0.0014993 0.0117612 0.0001688 29/284
DOID:120 female reproductive organ cancer 0.0137553 0.0111429 0.0013275 36/284
DOID:552 pneumonia 0.0213572 0.0130622 0.0017746 13/284
DOID:170 endocrine gland cancer 0.0365661 0.0109738 0.0019002 38/284
DOID:3070 malignant glioma 0.0236419 0.0242302 0.0017746 22/284
DOID:345 uterine disease 0.0316550 0.0272338 0.0019002 9/284
DOID:229 female reproductive system disease 0.0359228 0.0248975 0.0019002 18/284
DOID:288 endometriosis of uterus 0.0410068 0.0288509 0.0019788 5/284
DOID:3905 lung carcinoma 0.0703615 0.0109932 0.0031689 37/284
DOID:5041 esophageal cancer 0.0926079 0.0125777 0.0039102 16/284

Genes involved in top 5 enriched terms: ABCC3, ACP5, ADAM8, ANPEP, B4GALT1, BANK1, C5AR1, CASP4, CCL2, CCNL1, CD274, CD55, CDKN1A, CFLAR, CTGF, CTSK, CXCL1, CXCL14, CYR61, DNM2, EFEMP2, EIF4EBP3, ERC1, FOS, FOSL1, GRAP2, GSN, HBEGF, HSPB1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, KLF6, KLRC1, LATS2, LHCGR, LIF, LRP5, LTBR, MAP3K2, MCL1, MED1, MFF, MMP14, MMP2, MTDH, MTRR, NAMPT, NFATC1, NOD2, NOTCH3, NUPR1, OSM, PAX8, PLA2G4A, PLAU, PLAUR, PNP, PTGER4, PTGS2, PTPN13, PTPN22, PTPRJ, PXN, RASSF1, RCVRN, RELA, RUNX3, SAT1, SEMA3C, SEMA5A, SERPINE1, SH2D2A, SPHK1, TFPI2, TGFBI, THBS1, TIAM1, TIMP1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, U2AF1, VEGFC, YBX1

Genes involved in all enriched terms: ABCC3, ACP5, ADAM8, AHRR, ANPEP, AXIN1, B4GALT1, BANK1, C5AR1, CASP4, CCL2, CCNL1, CD274, CD55, CDKN1A, CFLAR, CTGF, CTSK, CXCL1, CXCL14, CYR61, DNM2, EFEMP2, EIF4EBP3, ERC1, FOS, FOSL1, FSTL3, GRAP2, GSN, HBEGF, HSPB1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, KLF6, KLRC1, LAMA5, LAMC1, LATS2, LHCGR, LIF, LRP5, LTBR, MAP3K2, MCL1, MED1, MFF, MMP14, MMP2, MT2A, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NUPR1, OSM, PAX8, PLA2G4A, PLAU, PLAUR, PNP, PTGER4, PTGS2, PTPN13, PTPN22, PTPRJ, PXN, RASSF1, RCVRN, RELA, RFX1, RUNX3, SAT1, SEMA3C, SEMA5A, SERPINE1, SH2D2A, SPHK1, TFPI2, TGFBI, THBS1, TIAM1, TIMP1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, U2AF1, VEGFC, YBX1

Enrichment results for cluster 45:
- GSEA has 237 enriched term(s)
- ORA has 417 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
DOID:0060100 musculoskeletal system cancer 0.0341421 0.0145544 0.0146455 12/54

Genes involved in all enriched terms: ACKR3, AR, BGLAP, CD163, CDH3, FOXO1, GSTP1, ICAM1, MAP3K5, SERPINF1, YAP1, ZIC1

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_mtcor, 'DO')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'DO')


Disease Gene Network

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'DGN')

Enrichment results for cluster 20:
- GSEA has 40 enriched term(s)
- ORA has 1957 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 36:
- GSEA has 239 enriched term(s)
- ORA has 2566 enriched term(s)
- 20 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
umls:C0003864 Arthritis 0.0010858 0.0482080 0.0001500 40/498
umls:C0032285 Pneumonia 0.0000204 0.0500942 0.0000085 36/498
umls:C3714514 Infection 0.0033328 0.0475979 0.0002893 41/498
umls:C0024115 Lung diseases 0.0003938 0.0506939 0.0000816 31/498
umls:C1519670 Tumor Angiogenesis 0.0022508 0.0492427 0.0002331 35/498
umls:C3495559 Juvenile arthritis 0.0009317 0.0508547 0.0001500 30/498
umls:C0015944 Fetal Membranes, Premature Rupture 0.0022080 0.0538436 0.0002331 21/498
umls:C0333307 Superficial ulcer 0.0001155 0.0561457 0.0000319 18/498
umls:C2919032 Infection of amniotic sac and membranes, unspecified, unspecified trimester, not applicable or unspecified 0.0046841 0.0539614 0.0003235 20/498
umls:C2986658 Diffuse Intrinsic Pontine Glioma 0.0000000 0.0618375 0.0000000 11/498
umls:C0013537 Eclampsia 0.0089283 0.0542816 0.0005679 19/498
umls:C0008495 Chorioamnionitis 0.0095944 0.0537997 0.0005679 20/498
umls:C0011644 Scleroderma 0.0123621 0.0546344 0.0006829 18/498
umls:C0022876 Premature Obstetric Labor 0.0187841 0.0535026 0.0009728 20/498
umls:C0003486 Aortic Aneurysm 0.0247327 0.0534035 0.0011386 20/498
umls:C0042133 Uterine Fibroids 0.0369282 0.0506654 0.0016106 27/498
umls:C0042373 Vascular Diseases 0.0434200 0.0497743 0.0017133 30/498
umls:C0151526 Premature Birth 0.0549919 0.0516364 0.0018228 24/498
umls:C0004623 Bacterial Infections 0.0814674 0.0530878 0.0024110 20/498
umls:C1335302 Pancreatic Ductal Adenocarcinoma 0.0996878 0.0476237 0.0027848 37/498

Genes involved in top 5 enriched terms: ABL1, ACP5, ADAM8, ANPEP, AOC2, APOBEC3C, APOBEC3F, BIRC3, BTN3A2, BTN3A3, C5AR1, CCL2, CD248, CD274, CD55, CDKN1A, CFB, CHI3L2, COL4A1, CTGF, CTSK, CXCL1, CYP4Z1, CYR61, DNM2, DOCK6, DUSP2, DUSP5, EIF2AK3, ELF1, ELF4, EPHA3, ESM1, FBXO32, FCGRT, FLVCR2, FNDC3A, FOS, FOSB, FOXL1, GBP1, GRAP2, HBEGF, HSPB1, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL8, ITGA5, JUNB, KLF6, KLRC1, LAMA5, LIF, LIPC, LTBR, MAPK7, MKNK2, MMP14, MMP2, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NR4A1, NR4A2, NUPR1, OSM, PLA2G4A, PLAU, PLAUR, PTBP1, PTGS2, PTPN22, PTPRJ, PXN, RASSF1, RELA, RGL2, RIPK2, RNF19A, RUNX3, SERPINE1, SH2B3, SIGLEC7, SLC16A3, SPHK1, TFPI2, THBS1, TIMP1, TNFAIP6, TNFRSF10B, TNFRSF10D, TNNC1, TRAF6, TTLL4, VASH1, VEGFC, YBX1, ZHX2, ZNRD1

Genes involved in all enriched terms: ABCC3, ABL1, ACP5, ADAM8, AFF4, ANPEP, AOC2, APOBEC3C, APOBEC3F, BANK1, BIRC3, BMP1, BTG1, BTG3, BTN3A2, BTN3A3, C2orf88, C5AR1, CALCR, CCL2, CD248, CD274, CD55, CDK19, CDKN1A, CFB, CFLAR, CHI3L2, COL4A1, COL4A2, CREM, CTGF, CTSK, CXCL1, CXCL14, CXCL2, CYP4Z1, CYR61, DNM2, DOCK6, DUSP2, DUSP5, EIF2AK3, ELF1, ELF4, EMP1, EPHA3, EPHX3, ESM1, FBXO32, FCGRT, FLVCR2, FNDC3A, FOS, FOSB, FOSL1, FOXL1, FSTL3, FUT7, GAL3ST1, GBP1, GCLC, GEM, GHR, GPRC5A, GRAP2, GSN, HBEGF, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IFNGR2, IFRD1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, INTS12, ITGA5, JUNB, KAT6A, KCNQ1, KLF6, KLRC1, LAMA5, LHCGR, LIF, LIPC, LRP5, LSP1, LTBP1, LTBR, MAFF, MAPK7, MCL1, MKNK2, MMP14, MMP2, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NPW, NR4A1, NR4A2, NUPR1, OSM, PACSIN2, PARD3, PDE7A, PLA2G4A, PLAU, PLAUR, PTBP1, PTGER4, PTGS2, PTPN22, PTPRJ, PXN, RASSF1, RELA, RGL2, RIPK2, RIPK3, RNF19A, RUNX3, SERPINE1, SH2B3, SH2D2A, SIGLEC7, SLC16A3, SPHK1, TFPI2, TGFBI, THBS1, TIMP1, TLN1, TNFAIP6, TNFRSF10B, TNFRSF10D, TNNC1, TRAF6, TTLL4, VASH1, VASP, VEGFC, YBX1, ZC3H12A, ZHX2, ZNRD1

Enrichment results for cluster 45:
- GSEA has 315 enriched term(s)
- ORA has 1470 enriched term(s)
- 8 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
umls:C0036421 Systemic Scleroderma 0.0000000 0.0633648 0.0000000 24/107
umls:C0008626 Congenital chromosomal disease 0.0000000 0.0634023 0.0000000 22/107
umls:C0346429 Multiple malignancy 0.0000000 0.0655068 0.0000000 18/107
umls:C0206659 Embryonal Carcinoma 0.0000000 0.0661103 0.0000000 15/107
umls:C0278504 Non-small cell lung cancer stage I 0.0000000 0.0667783 0.0000000 15/107
umls:C0238288 Muscular Dystrophy, Facioscapulohumeral 0.0000000 0.0667991 0.0000000 16/107
umls:C1449563 Cardiomyopathy, Familial Idiopathic 0.0014829 0.0655606 0.0001392 10/107
umls:C0000786 Spontaneous abortion 0.0515450 0.0654930 0.0036295 9/107

Genes involved in top 5 enriched terms: ACD, ACKR3, ANTXR1, AR, CD163, CLIC1, CYBRD1, EPHX1, GSTO2, GSTP1, HFE, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HLA-DQB2, HLA-DRB5, HSPB7, ICAM1, IL1R1, IRF1, KRT18, MYBPC3, SERPINF1, TIMP3, TNFAIP3, TNFRSF4, TNFSF4, WNT3, YAP1

Genes involved in all enriched terms: ACD, ACKR3, ANTXR1, AR, CD163, CLIC1, CYBRD1, EPHX1, GSTO2, GSTP1, HFE, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HLA-DQB2, HLA-DRB5, HSPB7, ICAM1, IL1R1, IRF1, KRT18, MYBPC3, SERPINF1, TIMP3, TNFAIP3, TNFRSF4, TNFSF4, WNT3, YAP1

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_mtcor, 'DGN')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'DGN')


KEGG

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'KEGG')

Enrichment results for cluster 20:
- GSEA has 44 enriched term(s)
- ORA has 268 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 36:
- GSEA has 66 enriched term(s)
- ORA has 272 enriched term(s)
- 10 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
hsa04613 Neutrophil extracellular trap formation 0.0008962 0.0051567 0.0002763 21/272
hsa04060 Cytokine-cytokine receptor interaction 0.0019268 0.0051355 0.0003895 21/272
hsa05322 Systemic lupus erythematosus 0.0021056 0.0053556 0.0003895 16/272
hsa05202 Transcriptional misregulation in cancer 0.0029734 0.0051574 0.0004584 20/272
hsa04668 TNF signaling pathway 0.0002317 0.0107648 0.0001072 17/272
hsa04621 NOD-like receptor signaling pathway 0.0133683 0.0051923 0.0017664 18/272
hsa05034 Alcoholism 0.0392455 0.0051572 0.0045374 18/272
hsa04061 Viral protein interaction with cytokine and cytokine receptor 0.0647984 0.0056390 0.0061440 9/272
hsa04064 NF-kappa B signaling pathway 0.0733941 0.0054350 0.0061713 12/272
hsa05131 Shigellosis 0.0664269 0.0149450 0.0061440 22/272

Genes involved in top 5 enriched terms: ANTXR2, BIRC3, C5AR1, CASP4, CCL2, CDKN1A, CLCF1, CTF1, CXCL1, CXCL14, CXCL2, DDX5, EYA1, FOSB, GBP1, GBP2, GHR, HAT1, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, LIF, LTBR, MAP3K7, NAMPT, NOD2, NUPR1, OSM, PAX8, PLAU, RELA, RIPK2, RIPK3, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, TRAF6

Genes involved in all enriched terms: ANTXR2, BIRC3, C5AR1, CASP4, CCL2, CDKN1A, CFLAR, CLCF1, CTF1, CXCL1, CXCL14, CXCL2, DDX5, ERC1, EYA1, FNBP1L, FOS, FOSB, GBP1, GBP2, GHR, HAT1, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, LIF, LTBR, MAP3K7, MLKL, MMP14, NAMPT, NOD2, NUPR1, OSM, PAX8, PLAU, PLCE1, PTGS2, PXN, RELA, RIPK2, RIPK3, TLN1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, TRAF6, U2AF1, VEGFC

Enrichment results for cluster 45:
- GSEA has 64 enriched term(s)
- ORA has 131 enriched term(s)
- 4 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
hsa05203 Viral carcinogenesis 2e-07 0.0062888 0 14/63
hsa05034 Alcoholism 0e+00 0.0063023 0 14/63
hsa04613 Neutrophil extracellular trap formation 0e+00 0.0063053 0 15/63
hsa05322 Systemic lupus erythematosus 0e+00 0.0063675 0 15/63


Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_mtcor, 'KEGG')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'KEGG')


Reactome

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'Reactome')

Enrichment results for cluster 20:
- GSEA has 89 enriched term(s)
- ORA has 729 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 36:
- GSEA has 197 enriched term(s)
- ORA has 888 enriched term(s)
- 50 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
R-HSA-449147 Signaling by Interleukins 0.0000124 0.0190427 0.0000005 39/332
R-HSA-9006931 Signaling by Nuclear Receptors 0.0002917 0.0201304 0.0000068 27/332
R-HSA-8878171 Transcriptional regulation by RUNX1 0.0000998 0.0203808 0.0000030 26/332
R-HSA-8939211 ESR-mediated signaling 0.0000310 0.0205241 0.0000011 26/332
R-HSA-2559583 Cellular Senescence 0.0000557 0.0207968 0.0000018 24/332
R-HSA-9018519 Estrogen-dependent gene expression 0.0000062 0.0213593 0.0000003 22/332
R-HSA-68875 Mitotic Prophase 0.0000189 0.0214390 0.0000007 21/332
R-HSA-3214847 HATs acetylate histones 0.0001080 0.0214177 0.0000031 20/332
R-HSA-211000 Gene Silencing by RNA 0.0000071 0.0215555 0.0000004 21/332
R-HSA-1474165 Reproduction 0.0000674 0.0216157 0.0000021 19/332
R-HSA-8939236 RUNX1 regulates transcription of genes involved in differentiation of HSCs 0.0000106 0.0216828 0.0000005 20/332
R-HSA-1500620 Meiosis 0.0000423 0.0218768 0.0000014 18/332
R-HSA-2559580 Oxidative Stress Induced Senescence 0.0002046 0.0217152 0.0000051 18/332
R-HSA-2559582 Senescence-Associated Secretory Phenotype (SASP) 0.0000000 0.0219327 0.0000000 22/332
R-HSA-1912422 Pre-NOTCH Expression and Processing 0.0000050 0.0219327 0.0000003 19/332
R-HSA-5578749 Transcriptional regulation by small RNAs 0.0000030 0.0219759 0.0000002 19/332
R-HSA-427413 NoRC negatively regulates rRNA expression 0.0001142 0.0219759 0.0000031 17/332
R-HSA-5250913 Positive epigenetic regulation of rRNA expression 0.0001142 0.0219759 0.0000031 17/332
R-HSA-977225 Amyloid fiber formation 0.0000048 0.0221053 0.0000003 18/332
R-HSA-5250941 Negative epigenetic regulation of rRNA expression 0.0001783 0.0219327 0.0000047 17/332
R-HSA-73864 RNA Polymerase I Transcription 0.0002377 0.0218768 0.0000057 17/332
R-HSA-73854 RNA Polymerase I Promoter Clearance 0.0002061 0.0219158 0.0000051 17/332
R-HSA-3214815 HDACs deacetylate histones 0.0000189 0.0222123 0.0000007 17/332
R-HSA-1912408 Pre-NOTCH Transcription and Translation 0.0000019 0.0222440 0.0000002 18/332
R-HSA-8936459 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 0.0000019 0.0222440 0.0000002 18/332
R-HSA-5625740 RHO GTPases activate PKNs 0.0000132 0.0222440 0.0000005 17/332
R-HSA-73772 RNA Polymerase I Promoter Escape 0.0000091 0.0222776 0.0000004 17/332
R-HSA-5250924 B-WICH complex positively regulates rRNA expression 0.0000091 0.0222776 0.0000004 17/332
R-HSA-201722 Formation of the beta-catenin:TCF transactivating complex 0.0000062 0.0223110 0.0000003 17/332
R-HSA-912446 Meiotic recombination 0.0000140 0.0223942 0.0000005 16/332
R-HSA-427389 ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression 0.0000032 0.0225464 0.0000002 16/332
R-HSA-2299718 Condensation of Prophase Chromosomes 0.0000016 0.0226604 0.0000002 16/332
R-HSA-212300 PRC2 methylates histones and DNA 0.0000016 0.0226604 0.0000002 16/332
R-HSA-427359 SIRT1 negatively regulates rRNA expression 0.0000005 0.0227512 0.0000001 16/332
R-HSA-5625886 Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 0.0000002 0.0228607 0.0000000 16/332
R-HSA-5334118 DNA methylation 0.0000001 0.0229205 0.0000000 16/332
R-HSA-73728 RNA Polymerase I Promoter Opening 0.0000001 0.0229205 0.0000000 16/332
R-HSA-5617472 Activation of anterior HOX genes in hindbrain development during early embryogenesis 0.0012447 0.0218768 0.0000277 16/332
R-HSA-5619507 Activation of HOX genes during differentiation 0.0012447 0.0218768 0.0000277 16/332
R-HSA-1266695 Interleukin-7 signaling 0.0000009 0.0235953 0.0000001 12/332
R-HSA-3214842 HDMs demethylate histones 0.0014118 0.0230764 0.0000307 11/332
R-HSA-212165 Epigenetic regulation of gene expression 0.0034402 0.0213101 0.0000713 18/332
R-HSA-6783783 Interleukin-10 signaling 0.0024551 0.0235745 0.0000521 9/332
R-HSA-157118 Signaling by NOTCH 0.0073706 0.0203308 0.0001493 23/332
R-HSA-6785807 Interleukin-4 and Interleukin-13 signaling 0.0172716 0.0222403 0.0003347 13/332
R-HSA-1474228 Degradation of the extracellular matrix 0.0254273 0.0216776 0.0004823 15/332
R-HSA-3214841 PKMTs methylate histone lysines 0.0366716 0.0225905 0.0006810 11/332
R-HSA-201681 TCF dependent signaling in response to WNT 0.0414055 0.0204004 0.0007533 21/332
R-HSA-3247509 Chromatin modifying enzymes 0.0957696 0.0199961 0.0016417 23/332
R-HSA-4839726 Chromatin organization 0.0957696 0.0199961 0.0016417 23/332

Genes involved in top 5 enriched terms: AGO4, AXIN1, B4GALT1, BRPF1, CCL2, CDKN1A, CLCF1, CTF1, CXCL1, CXCL2, DDX5, DHRS4, DHRS4L2, DTX2, FOS, FOSB, HAT1, HBEGF, HEYL, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IL1RN, IL6, IL7, IL7R, IL8, JUNB, KAT6A, KDM1B, LAMA5, LIF, MAP3K7, MAPK7, MBIP, MCL1, MED1, MMP2, NOD2, NOTCH3, OSM, PELI1, PSMC1, PTGS2, PTPN13, RCOR1, RELA, RIPK2, SPHK1, TIMP1, TRAF6, YBX1

Genes involved in all enriched terms: ABL1, ADAM8, AGO4, AXIN1, B4GALT1, BMP1, BRPF1, CAPN12, CCL2, CDKN1A, CLCF1, COL11A1, COL4A1, COL4A2, COL8A2, CTF1, CTSK, CTSL, CXCL1, CXCL2, DDX5, DHRS4, DHRS4L2, DIDO1, DTX2, ELF1, FOS, FOSB, GSN, HAT1, HBEGF, HEYL, HIST1H1D, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IL1RN, IL6, IL7, IL7R, IL8, JUNB, KAT6A, KDM1B, LAMA5, LAMC1, LEMD3, LIF, LRP5, MAP3K7, MAPK7, MBIP, MCL1, MED1, MMP14, MMP2, MYH9, NOD2, NOTCH3, NUMA1, NUP188, NUP98, OSM, PELI1, PHYKPL, PSMC1, PTGS2, PTPN13, RANBP2, RCOR1, RELA, RIPK2, RUNX3, SOX6, SPHK1, SYNE2, TAL1, TCF12, TDRD1, TET2, TGFBI, THBS1, TIMP1, TRAF6, YBX1, ZNRD1

Enrichment results for cluster 45:
- GSEA has 223 enriched term(s)
- ORA has 309 enriched term(s)
- 70 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
R-HSA-157118 Signaling by NOTCH 1.8e-06 0.0252188 0e+00 14/78
R-HSA-3247509 Chromatin modifying enzymes 1.7e-06 0.0252212 0e+00 15/78
R-HSA-4839726 Chromatin organization 1.7e-06 0.0252212 0e+00 15/78
R-HSA-8878171 Transcriptional regulation by RUNX1 1.0e-07 0.0252716 0e+00 15/78
R-HSA-9006931 Signaling by Nuclear Receptors 5.8e-06 0.0252883 1e-07 14/78
R-HSA-201681 TCF dependent signaling in response to WNT 0.0e+00 0.0253050 0e+00 16/78
R-HSA-8939211 ESR-mediated signaling 6.0e-07 0.0253841 0e+00 14/78
R-HSA-2559583 Cellular Senescence 0.0e+00 0.0255151 0e+00 17/78
R-HSA-9018519 Estrogen-dependent gene expression 0.0e+00 0.0255433 0e+00 14/78
R-HSA-212165 Epigenetic regulation of gene expression 0.0e+00 0.0255632 0e+00 14/78
R-HSA-3214847 HATs acetylate histones 0.0e+00 0.0256190 0e+00 14/78
R-HSA-73854 RNA Polymerase I Promoter Clearance 0.0e+00 0.0256302 0e+00 14/78
R-HSA-211000 Gene Silencing by RNA 0.0e+00 0.0256376 0e+00 15/78
R-HSA-1500620 Meiosis 0.0e+00 0.0256381 0e+00 16/78
R-HSA-73864 RNA Polymerase I Transcription 0.0e+00 0.0256381 0e+00 14/78
R-HSA-5617472 Activation of anterior HOX genes in hindbrain development during early embryogenesis 0.0e+00 0.0256381 0e+00 14/78
R-HSA-5619507 Activation of HOX genes during differentiation 0.0e+00 0.0256381 0e+00 14/78
R-HSA-68875 Mitotic Prophase 0.0e+00 0.0256430 0e+00 15/78
R-HSA-8939236 RUNX1 regulates transcription of genes involved in differentiation of HSCs 0.0e+00 0.0256513 0e+00 15/78
R-HSA-2559580 Oxidative Stress Induced Senescence 0.0e+00 0.0256562 0e+00 15/78
R-HSA-1474165 Reproduction 0.0e+00 0.0256577 0e+00 17/78
R-HSA-1912422 Pre-NOTCH Expression and Processing 0.0e+00 0.0256602 0e+00 14/78
R-HSA-5250941 Negative epigenetic regulation of rRNA expression 0.0e+00 0.0256602 0e+00 14/78
R-HSA-2559582 Senescence-Associated Secretory Phenotype (SASP) 0.0e+00 0.0256602 0e+00 14/78
R-HSA-73886 Chromosome Maintenance 0.0e+00 0.0257005 0e+00 16/78
R-HSA-427413 NoRC negatively regulates rRNA expression 0.0e+00 0.0257060 0e+00 14/78
R-HSA-5250913 Positive epigenetic regulation of rRNA expression 0.0e+00 0.0257060 0e+00 14/78
R-HSA-5578749 Transcriptional regulation by small RNAs 0.0e+00 0.0257060 0e+00 14/78
R-HSA-5693607 Processing of DNA double-strand break ends 0.0e+00 0.0257119 0e+00 14/78
R-HSA-3214815 HDACs deacetylate histones 0.0e+00 0.0257415 0e+00 14/78
R-HSA-977225 Amyloid fiber formation 0.0e+00 0.0257638 0e+00 14/78
R-HSA-69473 G2/M DNA damage checkpoint 0.0e+00 0.0257638 0e+00 14/78
R-HSA-5625740 RHO GTPases activate PKNs 0.0e+00 0.0258239 0e+00 15/78
R-HSA-1912408 Pre-NOTCH Transcription and Translation 0.0e+00 0.0258239 0e+00 14/78
R-HSA-8936459 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 0.0e+00 0.0258239 0e+00 14/78
R-HSA-73884 Base Excision Repair 0.0e+00 0.0258962 0e+00 16/78
R-HSA-73772 RNA Polymerase I Promoter Escape 0.0e+00 0.0258962 0e+00 14/78
R-HSA-5250924 B-WICH complex positively regulates rRNA expression 0.0e+00 0.0258962 0e+00 14/78
R-HSA-201722 Formation of the beta-catenin:TCF transactivating complex 0.0e+00 0.0259358 0e+00 14/78
R-HSA-157579 Telomere Maintenance 0.0e+00 0.0259866 0e+00 16/78
R-HSA-5693606 DNA Double Strand Break Response 0.0e+00 0.0259866 0e+00 14/78
R-HSA-3214858 RMTs methylate histone arginines 0.0e+00 0.0259866 0e+00 14/78
R-HSA-912446 Meiotic recombination 0.0e+00 0.0260038 0e+00 14/78
R-HSA-5693565 Recruitment and ATM-mediated phosphorylation of repair and signaling proteins at DNA double strand breaks 0.0e+00 0.0260058 0e+00 14/78
R-HSA-2299718 Condensation of Prophase Chromosomes 0.0e+00 0.0260245 0e+00 14/78
R-HSA-212300 PRC2 methylates histones and DNA 0.0e+00 0.0260245 0e+00 14/78
R-HSA-1221632 Meiotic synapsis 0.0e+00 0.0260336 0e+00 16/78
R-HSA-3214841 PKMTs methylate histone lysines 0.0e+00 0.0260391 0e+00 14/78
R-HSA-2559586 DNA Damage/Telomere Stress Induced Senescence 0.0e+00 0.0260437 0e+00 16/78
R-HSA-4551638 SUMOylation of chromatin organization proteins 0.0e+00 0.0260508 0e+00 14/78
R-HSA-427389 ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression 0.0e+00 0.0260508 0e+00 14/78
R-HSA-427359 SIRT1 negatively regulates rRNA expression 0.0e+00 0.0260985 0e+00 14/78
R-HSA-5693571 Nonhomologous End-Joining (NHEJ) 0.0e+00 0.0261066 0e+00 14/78
R-HSA-606279 Deposition of new CENPA-containing nucleosomes at the centromere 0.0e+00 0.0261336 0e+00 14/78
R-HSA-774815 Nucleosome assembly 0.0e+00 0.0261336 0e+00 14/78
R-HSA-73929 Base-Excision Repair, AP Site Formation 0.0e+00 0.0261387 0e+00 16/78
R-HSA-5625886 Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 0.0e+00 0.0261591 0e+00 15/78
R-HSA-5334118 DNA methylation 0.0e+00 0.0261663 0e+00 14/78
R-HSA-73728 RNA Polymerase I Promoter Opening 0.0e+00 0.0261663 0e+00 14/78
R-HSA-110328 Recognition and association of DNA glycosylase with site containing an affected pyrimidine 0.0e+00 0.0261928 0e+00 16/78
R-HSA-110329 Cleavage of the damaged pyrimidine 0.0e+00 0.0261928 0e+00 16/78
R-HSA-73928 Depyrimidination 0.0e+00 0.0261928 0e+00 16/78
R-HSA-110330 Recognition and association of DNA glycosylase with site containing an affected purine 0.0e+00 0.0262010 0e+00 16/78
R-HSA-110331 Cleavage of the damaged purine 0.0e+00 0.0262010 0e+00 16/78
R-HSA-73927 Depurination 0.0e+00 0.0262010 0e+00 16/78
R-HSA-3214842 HDMs demethylate histones 0.0e+00 0.0262323 0e+00 14/78
R-HSA-171306 Packaging Of Telomere Ends 0.0e+00 0.0262493 0e+00 16/78
R-HSA-3108232 SUMO E3 ligases SUMOylate target proteins 0.0e+00 0.0509826 0e+00 15/78
R-HSA-69620 Cell Cycle Checkpoints 3.0e-05 0.0572515 3e-07 14/78
R-HSA-5693567 HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA) 0.0e+00 0.0768981 0e+00 14/78

Genes involved in top 5 enriched terms: HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, PADI4, YAP1

Genes involved in all enriched terms: ACD, AR, FZD8, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HVCN1, MAP3K5, NEK9, PADI4, PLD6, TINF2, WNT3, YAP1

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_mtcor, 'Reactome')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'Reactome')





3.4.2 Top clusters by enrichment in SFARI Genes


top_modules_SFARI = top_modules[4:6]


Gene Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'GO')

Enrichment results for cluster 7:
- GSEA has 137 enriched term(s)
- ORA has 104 enriched term(s)
- 21 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
GO:0034762 regulation of transmembrane transport 0.0000000 0.0697816 0.0000000 92/1284
GO:0015672 monovalent inorganic cation transport 0.0000001 0.0711775 0.0000000 82/1284
GO:0034765 regulation of ion transmembrane transport 0.0000000 0.0711836 0.0000000 85/1284
GO:0042391 regulation of membrane potential 0.0006281 0.0732093 0.0000399 62/1284
GO:0050804 modulation of chemical synaptic transmission 0.0021823 0.0721148 0.0001200 65/1284
GO:0099177 regulation of trans-synaptic signaling 0.0024087 0.0720863 0.0001251 65/1284
GO:1904062 regulation of cation transmembrane transport 0.0005109 0.0752138 0.0000367 55/1284
GO:0050890 cognition 0.0001711 0.0761199 0.0000160 53/1284
GO:0007611 learning or memory 0.0000990 0.0776834 0.0000116 49/1284
GO:0032412 regulation of ion transmembrane transporter activity 0.0006399 0.0783061 0.0000399 45/1284
GO:0022898 regulation of transmembrane transporter activity 0.0016952 0.0780232 0.0000990 45/1284
GO:0032409 regulation of transporter activity 0.0028050 0.0775511 0.0001380 46/1284
GO:0006813 potassium ion transport 0.0000003 0.0808708 0.0000001 45/1284
GO:2001257 regulation of cation channel activity 0.0003698 0.0822741 0.0000288 36/1284
GO:0071804 cellular potassium ion transport 0.0000117 0.0847120 0.0000016 34/1284
GO:0071805 potassium ion transmembrane transport 0.0000117 0.0847120 0.0000016 34/1284
GO:0023061 signal release 0.0169944 0.0719902 0.0006619 63/1284
GO:0050808 synapse organization 0.0169944 0.0719902 0.0006619 63/1284
GO:0035637 multicellular organismal signaling 0.0222799 0.0804569 0.0008010 36/1284
GO:0048667 cell morphogenesis involved in neuron differentiation 0.0366082 0.0698008 0.0011799 73/1284
GO:0007612 learning 0.0938417 0.0834048 0.0024365 29/1284

Genes involved in top 5 enriched terms: ABCC8, ACTN2, ADCYAP1, ANK2, ASIC2, ASIC3, ATP1A1, ATP1B1, ATP2A1, ATP5G1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, B4GALT6, BCL11B, BHLHB9, BLOC1S6, BMP8A, BRAF, C2CD5, CACNA2D3, CACNB4, CACNG5, CADPS, CALM1, CALM2, CALM3, CAPN3, CARTPT, CASQ1, CCK, CD38, CDKL5, CHL1, CHN1, CLCN2, CLOCK, CNIH3, CNTN4, COL25A1, COX4I1, CRH, CRHBP, CTNNA2, CYB5R4, DCC, DCLK1, DGKI, DHX36, DLG3, DNM1L, DNM3, DPP10, DYSF, EFCAB4B, EFNA5, EPHA4, EPHA5, EPHB3, EXOC3L1, FAM115A, FBXO45, FFAR4, FGF12, FGF13, FGF14, FGF8, FGFR2, FLOT1, FLRT3, FZD3, GAD1, GAD2, GAL, GFRA2, GOPC, GPD1L, GPLD1, GPRASP2, GRIA2, GRIA3, GRIN2A, GRIN3A, GSTM2, HCN1, HPRT1, HTR2A, ICA1, ISCU, KALRN, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNRG, KCNS1, KCNS3, KCNV1, KEL, LIN7A, LRRC38, LRRC4C, LRRC7, LRRK2, MAGED2, MAP2, MAP6, MCF2, MEF2C, MYO5A, MYO7A, MYRIP, NALCN, NDEL1, NEFH, NGEF, NMU, NOS2, NRXN3, NSF, NTN4, OLFM1, OPA1, OPRK1, OSTN, OXCT1, PAFAH1B1, PAK1, PAK3, PARK2, PFN2, PIK3CA, PIK3CB, PIK3CD, PPARGC1A, PPP1R9A, PPP3CA, PPP3CB, PREPL, PRKCB, PRKCE, PRKCI, PTK2B, PTPN3, PTPRD, RASGRF1, RBFOX2, RGS4, RHOT1, ROBO2, RPS6KA5, RUFY3, RYR2, SCG5, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SEMA3B, SEMA6A, SESTD1, SIRT4, SIRT6, SLC15A5, SLC17A7, SLC17A8, SLC24A2, SLC25A4, SLC38A1, SLC4A10, SLC5A11, SLC5A8, SLC8A1, SLC8A3, SLC9A5, SLC9B2, SLIT2, SNAP25, SPHK2, SREBF1, STMN1, STXBP5L, SYNJ1, SYP, SYT1, SYT12, SYT4, SZT2, TAC1, TACR1, TBR1, TMEM27, TOP2B, TRPC1, TRPC5, UCHL1, UNC5C, USP33, VIP, VSNL1, YWHAH, ZNF365

Genes involved in all enriched terms: ABCA7, ABCC8, ACTN2, ADCYAP1, ADD2, ANK2, ASIC2, ASIC3, ATAD1, ATP1A1, ATP1B1, ATP2A1, ATP2B1, ATP5G1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, B4GALT6, BCL11B, BHLHB9, BID, BLOC1S6, BMP8A, BRAF, C2CD5, CA7, CACNA2D3, CACNB4, CACNG5, CADPS, CALB1, CALM1, CALM2, CALM3, CAMK4, CAPN3, CARTPT, CASQ1, CCK, CD38, CDKL5, CELF4, CHL1, CHN1, CHRM1, CHRM5, CHRNA1, CLCN2, CLOCK, CNIH3, CNTN4, CNTNAP2, COL25A1, COL4A5, CORIN, COX4I1, CPEB3, CRH, CRHBP, CTNNA2, CYB5R4, DCC, DCLK1, DDHD2, DGKB, DGKE, DGKI, DHX36, DLG3, DNM1L, DNM3, DPP10, DRP2, DYSF, EFCAB4B, EFNA5, EPHA4, EPHA5, EPHB3, ERBB4, EXOC3L1, FAM115A, FBXO45, FFAR4, FGF12, FGF13, FGF14, FGF8, FGFR2, FLOT1, FLRT3, FPGT-TNNI3K, FRMPD4, FZD3, FZD5, GABRA1, GABRB2, GABRD, GABRG2, GAD1, GAD2, GAL, GFRA2, GJD2, GLRB, GOPC, GOT1, GPD1L, GPHN, GPLD1, GPRASP2, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GSTM2, HCN1, HPRT1, HTR2A, ICA1, INA, ISCU, JAKMIP1, KALRN, KAT2A, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNRG, KCNS1, KCNS3, KCNV1, KEL, KIT, KRAS, LIN7A, LRFN5, LRRC38, LRRC4C, LRRC7, LRRK2, LRRN3, LRRTM2, LRRTM3, MAGED2, MAP2, MAP6, MCF2, MEF2C, MEIS2, MLLT11, MME, MYO5A, MYO7A, MYRIP, NALCN, NDEL1, NEFH, NGEF, NLGN4Y, NMU, NOS2, NPTN, NPY2R, NRG1, NRGN, NRXN3, NSF, NSG1, NTN4, OLFM1, OPA1, OPRK1, OSTN, OXCT1, P2RX6, PAFAH1B1, PAK1, PAK3, PAK7, PARK2, PCDH8, PCDHB10, PCDHB11, PCDHB14, PFN2, PIAS1, PICK1, PIK3CA, PIK3CB, PIK3CD, PJA2, PLCB1, PLK2, PPARGC1A, PPP1R9A, PPP3CA, PPP3CB, PREPL, PRKAR2B, PRKCB, PRKCE, PRKCI, PTK2B, PTPN3, PTPRD, RAB39B, RAB3B, RASGRF1, RBFOX2, RGS4, RGS7BP, RHOT1, ROBO2, RPS6KA5, RUFY3, RYR2, SCG5, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SEMA3B, SEMA6A, SESTD1, SGK2, SIRT4, SIRT6, SIX4, SLC15A5, SLC17A7, SLC17A8, SLC24A2, SLC25A36, SLC25A4, SLC38A1, SLC4A10, SLC5A11, SLC5A8, SLC8A1, SLC8A3, SLC9A5, SLC9B2, SLIT2, SLITRK4, SNAP25, SNCB, SPHK2, SREBF1, STMN1, STXBP5L, SYNJ1, SYP, SYT1, SYT12, SYT4, SZT2, TAC1, TACR1, TBR1, TMEM27, TNNI3K, TOP2B, TOR1A, TRPC1, TRPC5, TUSC3, UBE2V1, UBE2V2, UCHL1, UNC5C, USP33, VIP, VSNL1, WASF1, YWHAG, YWHAH, YWHAZ, ZC4H2, ZNF365

Enrichment results for cluster 22:
- GSEA has 279 enriched term(s)
- ORA has 5 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 39:
- GSEA has 173 enriched term(s)
- ORA has 8 enriched term(s)
- 3 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
GO:0032386 regulation of intracellular transport 0.0382956 0.0757639 0.0270058 23/302
GO:0051648 vesicle localization 0.0565921 0.0801804 0.0270058 18/302
GO:0051650 establishment of vesicle localization 0.0897019 0.0810056 0.0285373 17/302

Genes involved in all enriched terms: ABCA2, ATP2A2, CSNK2A2, DTNBP1, DYNC1H1, EMD, FAM65A, GRIK5, GSK3B, HTT, HUWE1, IPO5, KIF1A, KIF1B, MIEF1, NLGN2, NPEPPS, PRKACA, PRKCG, PTPN14, PTPN23, RAB11B, RAB3A, SEC16A, STX1B, SYN1, TCF7L2, TMED9, TRAPPC1, TRAPPC5, VAMP2

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_SFARI, 'GO')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'GO')


Disease Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'DO')

Enrichment results for cluster 7:
- GSEA has 89 enriched term(s)
- ORA has 645 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
DOID:1826 epilepsy syndrome 0.0040891 0.0106698 0.0040891 38/607

Genes involved in all enriched terms: ADAM22, CACNB4, CDKL5, CLCN2, CNTNAP2, CRH, DCX, FAM3C, FOXG1, GABBR2, GABRA1, GABRB2, GABRG2, GAD1, GAD2, GJD2, GPHN, GRIA2, GRIA3, GRIK1, GRIN2A, HCN1, HSPBAP1, KCNA1, KCNMA1, KCNQ3, LGI2, MEF2C, PNOC, PVALB, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SERPINI1, SLIT2, SYT1

Enrichment results for cluster 22:
- GSEA has 108 enriched term(s)
- ORA has 146 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 39:
- GSEA has 10 enriched term(s)
- ORA has 467 enriched term(s)
- 0 terms are enriched in both methods


Disease Gene Network

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'DGN')

Enrichment results for cluster 7:
- GSEA has 25 enriched term(s)
- ORA has 2893 enriched term(s)
- 2 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
umls:C0035372 Rett Syndrome 0.0560114 0.0502623 0.0153481 26/1234
umls:C0004936 Mental disorders 0.0802751 0.0422549 0.0153481 63/1234

Genes involved in all enriched terms: ACE, ACTN2, ADCYAP1, ADRBK2, ANKK1, ASMT, ATP6V1B2, CACNG5, CALB1, CARTPT, CCK, CD22, CDKL5, CDR2, CHRM4, CLOCK, CNTNAP2, CORT, CRH, CRHBP, CTXN3, DLX6, EFNA5, ENO2, ERBB4, FAM69A, FGFR2, FOXG1, GABRB2, GABRD, GABRG3, GAD1, GRIA2, GRIN2A, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HTR2A, HTR5A, HTR7, LMO3, LRRK2, MAGI1, MAP2, MCHR2, NALCN, NOS2, NRG1, NRSN1, NRXN3, NSF, OPRK1, PPP2R2B, PPP3CC, PTPRU, RAPGEF5, RGS4, RNMT, SCN8A, SEMA6A, SGSM3, SLC25A4, SNAP25, ST8SIA2, STMN1, SULT4A1, SYT4, TAC1, TACR1, TSNAX, VIP, WASF1, WDR45, YWHAH

Enrichment results for cluster 22:
- GSEA has 140 enriched term(s)
- ORA has 711 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 39:
- GSEA has 24 enriched term(s)
- ORA has 1921 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
umls:C3714756 Intellectual Disability 7.93e-05 0.0438518 7.74e-05 29/274

Genes involved in all enriched terms: ARID1A, CHAMP1, CIC, CUL4B, DYNC1H1, FASN, FMN2, GATAD2B, GNAS, HCFC1, HERC2, HUWE1, IQSEC2, KIAA1279, KIF1A, KMT2D, MECP2, MED12, PACS1, PCDH19, PRKCG, SHANK2, SMARCA4, SOBP, SPTAN1, SYNGAP1, TCF7L2, TSC1, UBR4

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_SFARI, 'DGN')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'DGN')


KEGG

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'KEGG')

Enrichment results for cluster 7:
- GSEA has 48 enriched term(s)
- ORA has 307 enriched term(s)
- 11 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
hsa04020 Calcium signaling pathway 0.0000932 0.0044042 0.0000361 43/563
hsa04728 Dopaminergic synapse 0.0045852 0.0047724 0.0007414 26/563
hsa04921 Oxytocin signaling pathway 0.0057410 0.0046608 0.0007414 29/563
hsa05033 Nicotine addiction 0.0056097 0.0054811 0.0007414 12/563
hsa04720 Long-term potentiation 0.0252295 0.0051540 0.0024438 16/563
hsa04724 Glutamatergic synapse 0.0287679 0.0048480 0.0024769 22/563
hsa04727 GABAergic synapse 0.0457932 0.0050211 0.0029571 18/563
hsa04360 Axon guidance 0.0021857 0.0590901 0.0005646 34/563
hsa04929 GnRH secretion 0.0346940 0.0414781 0.0026884 15/563
hsa04024 cAMP signaling pathway 0.0408887 0.0359115 0.0028804 33/563
hsa04713 Circadian entrainment 0.0820462 0.0049542 0.0048906 19/563

Genes involved in top 5 enriched terms: ADCY10, ADCYAP1, ADRA1D, ADRBK2, ATP2A1, ATP2B1, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK1G, CAMK4, CAMKK2, CASQ1, CD38, CHRM1, CHRM3, CHRM5, CLOCK, DLGAP1, ERBB4, FGF18, FGF5, FGF8, FGF9, FGFR2, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GUCY1B3, HTR2A, HTR5A, HTR7, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KRAS, MAPK10, MAPK8, MAPK9, MCOLN1, MCOLN2, MEF2C, MYLK3, NOS2, P2RX6, PHKG2, PLCB1, PLD2, PPP2CA, PPP2R2B, PPP2R2D, PPP2R5E, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKCB, PTK2B, ROCK2, RPS6KA5, RYR2, SCN1A, SLC17A7, SLC17A8, SLC25A4, SLC38A1, SLC8A1, SLC8A3, SPHK2, TACR1, TRPC1

Genes involved in all enriched terms: ADCY10, ADCYAP1, ADRA1D, ADRBK2, ATP1A1, ATP1B1, ATP2A1, ATP2B1, BRAF, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK1G, CAMK4, CAMKK2, CASQ1, CD38, CHRM1, CHRM3, CHRM5, CLOCK, CNGB3, CRH, DCC, DLGAP1, EFNA2, EFNA5, EPHA4, EPHA5, EPHB3, ERBB4, FGF18, FGF5, FGF8, FGF9, FGFR2, FZD3, GABARAPL1, GABBR2, GABRA1, GABRA4, GABRB2, GABRD, GABRG2, GABRG3, GAD1, GAD2, GLS, GNG3, GPHN, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GUCY1B3, HCN1, HTR2A, HTR5A, HTR7, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KRAS, LRRC4C, MAPK10, MAPK8, MAPK9, MCOLN1, MCOLN2, MEF2C, MYLK3, NGEF, NOS2, NSF, NTN4, P2RX6, PAK1, PAK3, PAK7, PHKG2, PIK3CA, PIK3CB, PIK3CD, PLCB1, PLD2, PLXNB3, PPP2CA, PPP2R2B, PPP2R2D, PPP2R5E, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKCB, PTK2B, RASA1, RND1, ROBO2, ROCK2, RPS6KA5, RPS6KA6, RYR2, SCN1A, SEMA3B, SEMA6A, SLC17A7, SLC17A8, SLC25A4, SLC38A1, SLC8A1, SLC8A3, SLIT2, SPHK2, SST, TACR1, TRPC1, TRPC3, TRPC4, TRPC5, TSHR, UNC5C, VIP

Enrichment results for cluster 22:
- GSEA has 50 enriched term(s)
- ORA has 60 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 39:
- GSEA has 49 enriched term(s)
- ORA has 224 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
hsa04962 Vasopressin-regulated water reabsorption 0.0535881 0.0057079 0.0496092 6/148

Genes involved in all enriched terms: ARHGDIA, DYNC1H1, GNAS, PRKACA, RAB11B, VAMP2

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_SFARI, 'KEGG')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'KEGG')


Reactome

compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'Reactome')

Enrichment results for cluster 7:
- GSEA has 70 enriched term(s)
- ORA has 1118 enriched term(s)
- 15 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
R-HSA-112316 Neuronal System 0.0000000 0.0165790 0.0000000 90/832
R-HSA-112315 Transmission across Chemical Synapses 0.0000011 0.0176302 0.0000003 54/832
R-HSA-112314 Neurotransmitter receptors and postsynaptic signal transmission 0.0000009 0.0183710 0.0000003 45/832
R-HSA-1296071 Potassium Channels 0.0000092 0.0201343 0.0000020 28/832
R-HSA-442755 Activation of NMDA receptors and postsynaptic events 0.0015988 0.0203691 0.0001539 23/832
R-HSA-1296072 Voltage gated Potassium channels 0.0000315 0.0220304 0.0000054 17/832
R-HSA-9620244 Long-term potentiation 0.0037844 0.0230669 0.0002899 11/832
R-HSA-438064 Post NMDA receptor activation events 0.0076408 0.0207495 0.0003893 20/832
R-HSA-5576891 Cardiac conduction 0.0095060 0.0192402 0.0004334 29/832
R-HSA-983712 Ion channel transport 0.0002076 0.0372522 0.0000257 38/832
R-HSA-397014 Muscle contraction 0.0248832 0.0183008 0.0010264 36/832
R-HSA-5576892 Phase 0 - rapid depolarisation 0.0389283 0.0217603 0.0014050 14/832
R-HSA-180024 DARPP-32 events 0.0457512 0.0230007 0.0015852 10/832
R-HSA-442982 Ras activation upon Ca2+ influx through NMDA receptor 0.0585270 0.0232663 0.0019499 9/832
R-HSA-438066 Unblocking of NMDA receptors, glutamate binding and activation 0.0909240 0.0232296 0.0027196 9/832

Genes involved in top 5 enriched terms: ABCC8, ACTC1, ACTN2, ATP1A1, ATP1B1, ATP2A1, ATP2B1, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK4, CAMKK2, CASQ1, CHRNA1, CORIN, DLG3, DLGAP1, DYSF, EPB41L3, ERBB4, FGF12, FGF13, FGF14, FLOT1, FLOT2, GABBR2, GABRA1, GABRA4, GABRB2, GABRG2, GABRG3, GAD1, GAD2, GJD2, GLRB, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GRIP1, GUCY1B3, HCN1, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNS1, KCNS3, KCNV1, KRAS, LIN7A, LRRC7, LRRTM2, LRRTM3, MYL3, NCALD, NLGN4Y, NPTN, NRG1, NRGN, NRXN3, NSF, PAK1, PICK1, PLCB1, PRKAA2, PRKACB, PRKAR2B, PRKCB, PTPRD, RASGRF1, RASGRF2, RPS6KA6, RTN3, RYR2, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SLC17A7, SLC38A1, SLC8A1, SLC8A3, SLITRK4, SNAP25, SYT1, SYT10, SYT12, SYT2, TNNT3, TRPC1

Genes involved in all enriched terms: ABCC8, ACTC1, ACTN2, ANO3, ANO7, ANO8, ASIC2, ASIC3, ATP1A1, ATP1B1, ATP2A1, ATP2B1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, BEST1, BEST4, C8orf44-SGK3, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK4, CAMKK2, CASQ1, CHRNA1, CLCN2, CORIN, DLG3, DLGAP1, DYSF, EPB41L3, ERBB4, FGF12, FGF13, FGF14, FLOT1, FLOT2, GABBR2, GABRA1, GABRA4, GABRB2, GABRG2, GABRG3, GAD1, GAD2, GJD2, GLRB, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GRIP1, GUCY1B3, HCN1, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNS1, KCNS3, KCNV1, KRAS, LIN7A, LRRC7, LRRTM2, LRRTM3, MCOLN1, MCOLN2, MYL3, NALCN, NCALD, NLGN4Y, NPTN, NRG1, NRGN, NRXN3, NSF, PAK1, PICK1, PLCB1, PPP2CA, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKAR2B, PRKCB, PTPRD, RASGRF1, RASGRF2, RPS6KA6, RTN3, RYR2, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SGK2, SGK3, SLC17A7, SLC38A1, SLC8A1, SLC8A3, SLC9B2, SLITRK4, SNAP25, SYT1, SYT10, SYT12, SYT2, TNNT3, TRPC1, TRPC3, TRPC4, TRPC5, TRPV6, UNC80

Enrichment results for cluster 22:
- GSEA has 103 enriched term(s)
- ORA has 312 enriched term(s)
- 0 terms are enriched in both methods

Enrichment results for cluster 39:
- GSEA has 70 enriched term(s)
- ORA has 742 enriched term(s)
- 2 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
R-HSA-9022692 Regulation of MECP2 expression and activity 0.0002994 0.046890 0.0002875 8/203
R-HSA-8986944 Transcriptional Regulation by MECP2 0.0476727 0.022074 0.0228929 8/203

Genes involved in all enriched terms: AGO1, AGO2, HDAC2, HTT, MECP2, NCOR2, PRKACA, SIN3A

Plots of the results when there are more than 5 terms in common between methods:

plot_results(top_modules_enrichment, top_modules_SFARI, 'Reactome')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'Reactome')




——


Relaxing restrictions for clusters that weren’t enriched in any term

# Get cluster name for clusters wiht numbers 20 and 22
selected_modules = c(genes_info %>% filter(module_number==20) %>% slice_head(1) %>% pull(Module) %>% as.character,
                     genes_info %>% filter(module_number==22) %>% slice_head(1) %>% pull(Module) %>% as.character)

if(file.exists('./../Data/preprocessedData/top_modules_enrichment_relaxed.RData')){
  load('./../Data/preprocessedData/top_modules_enrichment_relaxed.RData')
  load('./../Data/preprocessedData/GSEA_results_relaxed.RData')
  load('./../Data/preprocessedData/ORA_results_relaxed.RData')
} else{

  pvalueCutoff = 0.5
  ################################################################################################################
  # Prepare dataset for Enrichment Analysis
  
  EA_dataset = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
  
  # ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
  getinfo = c('ensembl_gene_id','entrezgene')
  mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
  biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), 
                         values=EA_dataset$ensembl_gene_id, mart=mart)
  
  EA_dataset = biomart_output %>% left_join(EA_dataset, by='ensembl_gene_id') %>% 
               dplyr::rename('ID'=ensembl_gene_id) %>% distinct(entrezgene, .keep_all = TRUE)
  
  rm(getinfo, mart, biomart_output)
  
  ################################################################################################################
  # GSEA enrichment
  
  file_name = './../Data/preprocessedData/GSEA_results_relaxed.RData'
  if(file.exists(file_name)){
    load(file_name)
  } else {
    cat('\n\nPerforming GSEA\n')
    
    nPerm = 1e5
    GSEA_dataset = EA_dataset %>% dplyr::select(ID, entrezgene, contains('MM.'))
    GSEA_enrichment = list()
    
    for(module in selected_modules){
      
      cat(paste0('\nModule: ', which(selected_modules == module), '/', length(selected_modules)))
      
      geneList = GSEA_dataset %>% pull(paste0('MM.',substring(module,2)))
      names(geneList) = GSEA_dataset %>% pull(entrezgene) %>% as.character
      geneList = sort(geneList, decreasing = TRUE)
      
      GSEA_GO = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, 
                      nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_DO = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, 
                      nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_DGN = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, 
                        nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_KEGG = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, 
                          nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_Reactome = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, 
                                 nPerm = nPerm, verbose = FALSE, seed = TRUE)
      
      GSEA_enrichment[[module]] = list('GO' = GSEA_GO, 'DO' = GSEA_DO, 'DGN' = GSEA_DGN, 'KEGG' = GSEA_KEGG, 
                                       'Reactome' = GSEA_Reactome)
      
      # Save after each iteration (in case it breaks)
      save(GSEA_enrichment, file = file_name)
    }
    
    rm(GSEA_dataset, nPerm, geneList, GSEA_GO, GSEA_DO, GSEA_DGN, GSEA_KEGG, GSEA_Reactome)
    
  }
  
  ################################################################################################################
  # ORA enrichment
  
  file_name = './../Data/preprocessedData/ORA_results_relaxed.RData'
  if(file.exists(file_name)){
    load(file_name)
  } else {
    cat('\n\nPerforming ORA\n')
    
    # Prepare input
    universe = EA_dataset$entrezgene %>% as.character
    
    # Perform Enrichment
    ORA_enrichment = list()
    
    for(module in selected_modules){
      
      cat(paste0('\nModule: ', which(selected_modules == module), '/', length(selected_modules)))
      
      genes_in_module = EA_dataset %>% filter(Module == module) %>% pull(entrezgene)
      
      ORA_GO = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db, ont = 'All', 
                        pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, qvalueCutoff = 1)
      
      ORA_DO = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                        pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
      
      ORA_DGN = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                          pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
      
      ORA_KEGG = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                            pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff) 
      
      ORA_Reactome = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                   pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
      
      ORA_enrichment[[module]] = list('GO' = ORA_GO, 'DO' = ORA_DO, 'DGN' = ORA_DGN, 'KEGG' = ORA_KEGG, 
                                      'Reactome' = ORA_Reactome)
      
      # Save after each iteration (in case it breaks)
      save(ORA_enrichment, file = file_name)
    }
    
    rm(universe, genes_in_module, module, ORA_GO, ORA_DGN, ORA_DO, ORA_KEGG, ORA_Reactome)
  
  }
  
  ################################################################################################################
  # Get shared enrichment for each module
  
  selected_modules_enrichment = list()
  
  for(module in selected_modules){
    
    module_enrichment = list()
    GSEA_enrichment_for_module = GSEA_enrichment[[module]]
    ORA_enrichment_for_module = ORA_enrichment[[module]]
    
    for(dataset in c('KEGG', 'Reactome', 'GO', 'DO', 'DGN')){
      
      GSEA_enrichment_dataset = GSEA_enrichment_for_module[[dataset]] %>% data.frame %>%
        dplyr::rename('pvalue_GSEA' = pvalue, 'p.adjust_GSEA' = p.adjust, 'qvalues_GSEA' = qvalues)
      
      ORA_enrichment_dataset = ORA_enrichment_for_module[[dataset]] %>% data.frame %>%
        dplyr::rename('pvalue_ORA' = pvalue, 'p.adjust_ORA' = p.adjust, 'qvalue_ORA' = qvalue)
      
      # Get shared enrichments (if any)
      shared_enrichment_dataset = GSEA_enrichment_dataset %>% inner_join(ORA_enrichment_dataset, by = 'ID')
      
      module_enrichment[[dataset]] = shared_enrichment_dataset
    }
    
    selected_modules_enrichment[[module]] = module_enrichment
  }
  
  save(selected_modules_enrichment, file = './../Data/preprocessedData/top_modules_enrichment_relaxed.RData')
  
  rm(module, module_enrichment, GSEA_enrichment_for_module, ORA_enrichment_for_module, dataset, 
     GSEA_enrichment_dataset, ORA_enrichment_dataset, shared_enrichment_dataset)
}

Relaxing the p-value only worked for one of the modules, the other still has zero elements in common between the GSEA and ORA results


Relaxed enrichment for cluster 22


Gene Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'GO')

Enrichment results for cluster 22:
- GSEA has 409 enriched term(s)
- ORA has 10 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
GO:0043409 negative regulation of MAPK cascade 0.0598975 0.3227047 0.0183522 6/59

Genes involved in all enriched terms: DUSP4, DUSP6, ERRFI1, NCOR1, SPRED2, SPRY2

Disease Ontology

compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'DO')

Enrichment results for cluster 22:
- GSEA has 147 enriched term(s)
- ORA has 146 enriched term(s)
- 0 terms are enriched in both methods


Disease Gene Network

compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'DGN')

Enrichment results for cluster 22:
- GSEA has 219 enriched term(s)
- ORA has 711 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
umls:C0740392 Infarction, Middle Cerebral Artery 0.3033946 0.3073569 0.1428374 4/58

Genes involved in all enriched terms: ARC, EGR1, EGR2, EGR4

KEGG

compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'KEGG')

Enrichment results for cluster 22:
- GSEA has 64 enriched term(s)
- ORA has 60 enriched term(s)
- 1 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
hsa05202 Transcriptional misregulation in cancer 0.4564711 0.0045094 0.4484628 4/32

Genes involved in all enriched terms: DUSP6, NCOR1, NR4A3, PER2

Reactome

compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'Reactome')

Enrichment results for cluster 22:
- GSEA has 137 enriched term(s)
- ORA has 312 enriched term(s)
- 2 terms are enriched in both methods

ID Description p.adjust_ORA p.adjust_GSEA qvalue_ORA GeneRatio
R-HSA-3247509 Chromatin modifying enzymes 0.456008 0.0171148 0.0946049 6/43
R-HSA-4839726 Chromatin organization 0.456008 0.0171148 0.0946049 6/43

Genes involved in all enriched terms: KDM5B, MTA2, NCOA2, NCOR1, TRRAP, WHSC1L1

Plots of the results when there are more than 5 terms in common between methods:

plot_results(selected_modules_enrichment, selected_modules[2], 'Reactome')
plot_shared_genes(selected_modules_enrichment, selected_modules[2], 'Reactome')



——


GSEA and ORA top enrichment results for cluster 20


Note: I am using a corrected p-value threhsold of 0.05, since the relaxation was only because we were combining these results with the ones from the ORA

load('./../Data/preprocessedData/GSEA_results.RData')
load('./../Data/preprocessedData/ORA_results.RData')

print_GSEA_top_results = function(module, n){
  
  for(database in c('GO','DO','DGN','KEGG','Reactome')){
      res = GSEA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05 & NES>0) %>%
            dplyr::select(ID, Description, NES, p.adjust, qvalues) %>% arrange(desc(NES)) %>% top_n(n, wt=NES)  
      
      cat(paste0('\n',database,':\n'))
      
      if(nrow(res)>0){
        print(res %>% kable %>% kable_styling(full_width = F))  
        #print(xtable(res, display =c('s','s','s','f','e','e')), include.rownames=FALSE) # thesis
      } else { 
        cat('\nNo enriched terms found\n\n\n')
      }
    
  }
}

plot_shared_genes_GSEA = function(module, n){
    for(database in c('GO','DO','DGN','KEGG','Reactome')){
      plot_data = GSEA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05 & NES>0) %>%
                   arrange(desc(NES)) %>% dplyr::select(ID, core_enrichment) %>% slice_head(n=n) 
      
      if(nrow(plot_data)>1){
    
      shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
      for(i in 1:(nrow(plot_data)-1)){
        for(j in (i+1):nrow(plot_data)){
          gene_set_1 = strsplit(plot_data$core_enrichment[i], '/') %>% unlist
          gene_set_2 = strsplit(plot_data$core_enrichment[j], '/') %>% unlist
          shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
          shared_genes[j,i] = shared_genes[i,j]
        }
      }
      rownames(shared_genes) = plot_data$ID
      colnames(shared_genes) = plot_data$ID
  
      corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n', 
               tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
               mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
               title = paste0('Genes in common in the ',database, ' database for cluster ',
                              genes_info$module_number[genes_info$Module==module][1]))

      }
  }
}

print_ORA_top_results = function(module, n){
  
  for(database in c('GO','DO','DGN','KEGG','Reactome')){
      res = ORA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05) %>%
            dplyr::select(ID, Description, p.adjust, qvalue, GeneRatio) %>% arrange(p.adjust) %>% 
            top_n(n, wt=p.adjust)
      
      cat(paste0('\n',database,':\n'))
      
      if(nrow(res)>0){
        print(res %>% kable %>% kable_styling(full_width = F))  
        #print(xtable(res, display =c('s','s','s','e','e','s')), include.rownames=FALSE) # thesis
      } else { 
        cat('\nNo enriched terms found\n\n\n')
      }
    
  }
}

plot_shared_genes_ORA = function(module, n){
    for(database in c('GO','DO','DGN','KEGG','Reactome')){
      plot_data = ORA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05) %>%
                  arrange(desc(p.adjust)) %>% dplyr::select(ID, geneID) %>% slice_head(n=n)
      
      if(nrow(plot_data)>1){
    
      shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
      for(i in 1:(nrow(plot_data)-1)){
        for(j in (i+1):nrow(plot_data)){
          gene_set_1 = strsplit(plot_data$core_enrichment[i], '/') %>% unlist
          gene_set_2 = strsplit(plot_data$core_enrichment[j], '/') %>% unlist
          shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
          shared_genes[j,i] = shared_genes[i,j]
        }
      }
      rownames(shared_genes) = plot_data$ID
      colnames(shared_genes) = plot_data$ID
  
      corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n', 
               tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
               mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
               title = paste0('Genes in common in the ',database, ' database for cluster ',
                              genes_info$module_number[genes_info$Module==module][1]))

      }
  }
}


GSEA enrichment results

print_GSEA_top_results(selected_modules[1], 5)

GO:

No enriched terms found

DO:
ID Description NES p.adjust qvalues
DOID:1826 DOID:1826 epilepsy syndrome 2.149075 0.0136879 0.0002301

DGN:

No enriched terms found

KEGG:
ID Description NES p.adjust qvalues
hsa00190 hsa00190 Oxidative phosphorylation 2.857289 0.0059279 0.0002276
hsa05016 hsa05016 Huntington disease 2.613427 0.0055894 0.0002276
hsa00020 hsa00020 Citrate cycle (TCA cycle) 2.563582 0.0063893 0.0002276
hsa05012 hsa05012 Parkinson disease 2.471158 0.0056574 0.0002276
hsa05415 hsa05415 Diabetic cardiomyopathy 2.431780 0.0057627 0.0002276
Reactome:
ID Description NES p.adjust qvalues
R-HSA-1428517 R-HSA-1428517 The citric acid (TCA) cycle and respiratory electron transport 2.917908 0.0235084 0.0003226
R-HSA-611105 R-HSA-611105 Respiratory electron transport 2.876693 0.0242372 0.0003226
R-HSA-163200 R-HSA-163200 Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins. 2.821164 0.0240467 0.0003226
R-HSA-5576892 R-HSA-5576892 Phase 0 - rapid depolarisation 2.660878 0.0252349 0.0003226
R-HSA-888590 R-HSA-888590 GABA synthesis, release, reuptake and degradation 2.627300 0.0261688 0.0003226


Plots of the results when there are more than 2 terms in common between methods:

plot_shared_genes_GSEA(selected_modules[1], 5)



ORA enrichment results

print_ORA_top_results(selected_modules[1], 5)
GO:
ID Description p.adjust qvalue GeneRatio
GO:0070603 GO:0070603 SWI/SNF superfamily-type complex 0.0472059 0.0466179 10/499

DO:

No enriched terms found

DGN:

No enriched terms found

KEGG:

No enriched terms found

Reactome:

No enriched terms found





Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] xtable_1.8-4           kableExtra_1.1.0       knitr_1.32            
##  [4] doParallel_1.0.15      iterators_1.0.13       foreach_1.5.1         
##  [7] org.Hs.eg.db_3.8.2     AnnotationDbi_1.46.1   IRanges_2.18.3        
## [10] S4Vectors_0.22.1       Biobase_2.44.0         BiocGenerics_0.30.0   
## [13] DOSE_3.10.2            ReactomePA_1.28.0      clusterProfiler_3.12.0
## [16] biomaRt_2.40.5         polycor_0.7-10         expss_0.10.7          
## [19] WGCNA_1.69             fastcluster_1.2.3      dynamicTreeCut_1.63-1 
## [22] ggExtra_0.9            ggpubr_0.2.5           magrittr_2.0.1        
## [25] GGally_1.5.0           corrplot_0.90          colorspace_2.0-2      
## [28] gridExtra_2.3          viridis_0.6.1          viridisLite_0.4.0     
## [31] RColorBrewer_1.1-2     dendextend_1.15.1      plotly_4.9.2          
## [34] glue_1.4.2             reshape2_1.4.4         forcats_0.5.0         
## [37] stringr_1.4.0          dplyr_1.0.1            purrr_0.3.4           
## [40] readr_1.3.1            tidyr_1.1.0            tibble_3.1.2          
## [43] ggplot2_3.3.5          tidyverse_1.3.0       
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  tidyselect_1.1.1           
##   [3] RSQLite_2.2.0               htmlwidgets_1.5.3          
##   [5] grid_3.6.3                  BiocParallel_1.18.1        
##   [7] munsell_0.5.0               codetools_0.2-16           
##   [9] preprocessCore_1.46.0       miniUI_0.1.1.1             
##  [11] withr_2.4.2                 GOSemSim_2.10.0            
##  [13] highr_0.9                   rstudioapi_0.13            
##  [15] ggsignif_0.6.2              labeling_0.4.2             
##  [17] urltools_1.7.3              GenomeInfoDbData_1.2.1     
##  [19] polyclip_1.10-0             bit64_4.0.5                
##  [21] farver_2.1.0                vctrs_0.3.8                
##  [23] generics_0.1.0              xfun_0.25                  
##  [25] GenomeInfoDb_1.20.0         R6_2.5.1                   
##  [27] graphlayouts_0.7.0          locfit_1.5-9.4             
##  [29] DelayedArray_0.10.0         bitops_1.0-7               
##  [31] cachem_1.0.6                reshape_0.8.8              
##  [33] fgsea_1.10.1                gridGraphics_0.5-1         
##  [35] assertthat_0.2.1            promises_1.2.0.1           
##  [37] scales_1.1.1                ggraph_2.0.3               
##  [39] nnet_7.3-14                 enrichplot_1.4.0           
##  [41] gtable_0.3.0                tidygraph_1.2.0            
##  [43] rlang_0.4.11                genefilter_1.66.0          
##  [45] splines_3.6.3               lazyeval_0.2.2             
##  [47] acepack_1.4.1               impute_1.58.0              
##  [49] broom_0.7.0                 europepmc_0.4              
##  [51] checkmate_2.0.0             BiocManager_1.30.16        
##  [53] yaml_2.2.1                  modelr_0.1.6               
##  [55] crosstalk_1.1.1             backports_1.2.1            
##  [57] httpuv_1.6.1                qvalue_2.16.0              
##  [59] Hmisc_4.4-0                 tools_3.6.3                
##  [61] ggplotify_0.1.0             ellipsis_0.3.2             
##  [63] jquerylib_0.1.4             ggridges_0.5.3             
##  [65] Rcpp_1.0.7                  plyr_1.8.6                 
##  [67] zlibbioc_1.30.0             base64enc_0.1-3            
##  [69] progress_1.2.2              RCurl_1.98-1.4             
##  [71] prettyunits_1.1.1           rpart_4.1-15               
##  [73] cowplot_1.1.1               SummarizedExperiment_1.14.1
##  [75] haven_2.2.0                 ggrepel_0.9.1              
##  [77] cluster_2.1.0               fs_1.5.0                   
##  [79] data.table_1.14.0           DO.db_2.9                  
##  [81] reactome.db_1.68.0          triebeard_0.3.0            
##  [83] reprex_0.3.0                matrixStats_0.60.1         
##  [85] hms_1.1.0                   mime_0.11                  
##  [87] evaluate_0.14               XML_3.99-0.3               
##  [89] jpeg_0.1-9                  readxl_1.3.1               
##  [91] compiler_3.6.3              crayon_1.4.1               
##  [93] htmltools_0.5.2             later_1.3.0                
##  [95] Formula_1.2-4               geneplotter_1.62.0         
##  [97] lubridate_1.7.10            DBI_1.1.1                  
##  [99] tweenr_1.0.2                dbplyr_1.4.2               
## [101] rappdirs_0.3.3              MASS_7.3-53                
## [103] Matrix_1.2-18               cli_3.0.1                  
## [105] igraph_1.2.6                GenomicRanges_1.36.1       
## [107] pkgconfig_2.0.3             rvcheck_0.1.8              
## [109] foreign_0.8-76              xml2_1.3.2                 
## [111] annotate_1.62.0             bslib_0.3.0                
## [113] XVector_0.24.0              webshot_0.5.2              
## [115] rvest_0.3.5                 yulab.utils_0.0.2          
## [117] digest_0.6.27               graph_1.62.0               
## [119] rmarkdown_2.7               cellranger_1.1.0           
## [121] fastmatch_1.1-3             htmlTable_1.13.3           
## [123] curl_4.3.2                  shiny_1.6.0                
## [125] graphite_1.30.0             lifecycle_1.0.0            
## [127] jsonlite_1.7.2              fansi_0.5.0                
## [129] pillar_1.6.2                lattice_0.20-41            
## [131] fastmap_1.1.0               httr_1.4.2                 
## [133] survival_3.2-7              GO.db_3.8.2                
## [135] UpSetR_1.4.0                png_0.1-7                  
## [137] bit_4.0.4                   ggforce_0.3.1              
## [139] stringi_1.7.4               sass_0.4.0                 
## [141] blob_1.2.2                  DESeq2_1.24.0              
## [143] latticeExtra_0.6-29         memoise_2.0.0